# ==================================
#
#  Code for replicating:
# "Positioning Under Alternative Electoral Systems: Evidence From Japanese Candidate Election Manifestos"
#  Amy Catalinac, NYU
#
# ==================================



# ==================================
# Regression to test for structural break
# sample (all candidates pre-ER and LDP, NFP, and DPJ candidates post-ER)
# (results in the first two columns of Table 1 of the Online Appendix)

load("districts_dispersion_prepostER.Rdata") # called dat1a

# variance = dispersion scores for each district (from "dispersion_in_districts_preER_all.csv"  
# and "dispersion_in_districts_postER_majorcands.csv")
# er = a dummy coded 1 for post-ER, 0 for pre-ER
# time = coded 1 (1986) ... 8 (2009)
# elec_2009 = a dummy for 2009
# district = electoral-system specific districts (merged "ku" with "er" indicator)

dat1a$er <- as.factor(dat1a$er)
dat1a$elec_2009 <- as.factor(dat1a$elec_2009)
dat1a$district <- as.factor(dat1a$district)

cl   <- function(dat,fm, cluster){
  require(sandwich, quietly = TRUE)
  require(lmtest, quietly = TRUE)
  M <- length(unique(cluster))
  N <- length(cluster)
  K <- fm$rank
  dfc <- (M/(M-1))*((N-1)/(N-K))
  uj  <- apply(estfun(fm),2, function(x) tapply(x, cluster, sum));
  vcovCL <- dfc*sandwich(fm, meat=crossprod(uj)/N)
  coeftest(fm, vcovCL) }

# "Interaction" model in column 1, Table 1 in Appendix 
fit1a <- lm(variance ~ time + er + time*er, data=dat1a)
fit1a.cl <- cl(dat1a, fit1a, dat1a$district)
print(fit1a.cl)
cat("\n R^2=",summary(fit1a)$r.squared,"\n")
cat("\n adj R^2=",summary(fit1a)$adj.r.squared,"\n")

# (+ controls) model in column 2, Table 1 in Appendix
fit2a <- lm(variance ~ time + er + time*er + prefecture + district + elec_2009, data=dat1a)
fit2a.cl <- cl(dat1a, fit2a, dat1a$district)
print(fit2a.cl)
cat("\n R^2=",summary(fit2a)$r.squared,"\n")
cat("\n adj R^2=",summary(fit2a)$adj.r.squared,"\n")

rm(list=ls())




# ==================================
# Chow test
# sample (all candidates pre-ER and LDP, NFP, and DPJ candidates post-ER)

load("districts_dispersion_prepostER.Rdata") # called dat1a

# order dat1a by time (identify last row pertaining to old electoral system):
both5 <- dat1a[order(dat1a$time), ]
both5 <- cbind(index(both5),both5) # 387th row is the last observation under old electoral system:
require(strucchange)
sctest(variance ~ time, type = "Chow", point = 387, data = both5)

rm(list=ls())





# ==================================
# Regression to test for structural break
# sample (competitive candidates pre-ER and LDP, NFP, and DPJ candidates post-ER)
# (second two columns of Table 1 of the Online Appendix)

load("districts_dispersion_prepostER_comp.Rdata") # called dat1b

# variance = dispersion scores for each district (from "dispersion_in_districts_preER_compcands.csv"  
# and "dispersion_in_districts_postER_majorcands.csv")
# er = a dummy coded 1 for post-ER, 0 for pre-ER
# time = coded 1 (1986) ... 8 (2009)
# elec_2009 = a dummy for 2009
# district = electoral-system specific districts (merged "ku" with "er" indicator)

dat1b$er <- as.factor(dat1b$er)
dat1b$elec_2009 <- as.factor(dat1b$elec_2009)
dat1b$district <- as.factor(dat1b$district)

cl   <- function(dat,fm, cluster){
  require(sandwich, quietly = TRUE)
  require(lmtest, quietly = TRUE)
  M <- length(unique(cluster))
  N <- length(cluster)
  K <- fm$rank
  dfc <- (M/(M-1))*((N-1)/(N-K))
  uj  <- apply(estfun(fm),2, function(x) tapply(x, cluster, sum));
  vcovCL <- dfc*sandwich(fm, meat=crossprod(uj)/N)
  coeftest(fm, vcovCL) }

# "Interaction" model in column 3, Table 1 in Appendix 
fit1b <- lm(variance ~ time + er + time*er, data=dat1b)
fit1b.cl <- cl(dat1b, fit1b, dat1b$district)
print(fit1b.cl)
cat("\n R^2=",summary(fit1b)$r.squared,"\n")
cat("\n adj R^2=",summary(fit1b)$adj.r.squared,"\n")

# (+ controls) model in column 4, Table 1 in Appendix
fit2b <- lm(variance ~ time + er + time*er + prefecture + district + elec_2009, data=dat1b)
fit2b.cl <- cl(dat1b, fit2b, dat1b$district)
print(fit2b.cl)
cat("\n R^2=",summary(fit2b)$r.squared,"\n")
cat("\n adj R^2=",summary(fit2b)$adj.r.squared,"\n")

rm(list=ls())



# ==================================
# Chow test
# sample (competitive candidates pre-ER and LDP, NFP, and DPJ candidates post-ER)

load("districts_dispersion_prepostER_comp.Rdata") # called dat1b

# Order dat1b by time - identify the last row pertaining to old system:
both6 <- dat1b[order(dat1b$time), ]
both6 <- cbind(index(both6),both6) # 384th row is the last observation under old electoral system:
require(strucchange)
sctest(variance ~ time, type = "Chow", point = 384, data = both6)

rm(list=ls())







# ==================================
# Predicted Values
# (Figure 2 in the paper)

# load district-level dispersion calculated with all cands prior to ER and LDP, NFP, and DPJ cands post-ER
load("districts_dispersion_prepostER.Rdata") # called dat1a
dat1a$er <- as.factor(dat1a$er)
dat1a$elec_2009 <- as.factor(dat1a$elec_2009)
dat1a$district <- as.factor(dat1a$district)

# load district-level dispersion calculated only with competitive cands prior to ER
load("districts_dispersion_prepostER_comp.Rdata") # called dat1b
dat1b$er <- as.factor(dat1b$er)
dat1b$elec_2009 <- as.factor(dat1b$elec_2009)
dat1b$district <- as.factor(dat1b$district)

fit2a.f <- lm(variance ~ time + er + time*er + elec_2009, data=dat1a)
fit2b.f <- lm(variance ~ time + er + time*er + elec_2009, data=dat1b)
nd <- data.frame(time=c(1:8), er=as.factor(c(0,0,0,1,1,1,1,1)), elec_2009=as.factor(c(0,0,0,0,0,0,0,1)) )
predictions1 <- predict.lm(fit2a.f, nd, interval="confidence")
predictions2 <- predict.lm(fit2b.f, nd, interval="confidence")

par(mfrow = c(1, 2), mar = c(4,4,2,1), tcl = -0.25, mgp = c(1.75, 0.6, 0),
    font.main = 1, cex.main = 2)
years <- c(1986, 1990, 1993, 1996, 2000, 2003, 2005, 2009)
plot(years, predictions1[,1],  pch=19, main="", xlab="Year", 
     ylab="Predicted Values", cex.lab = 1.5, cex.axis=1.5, xaxt="n", ylim=c(0, 2.5))
axis(1, at=c(1986, 1990, 1993, 1996, 2000, 2003, 2005, 2009), cex.axis=1.5)
arrows(years[1], predictions1[1,2], years[1], predictions1[1,3], code = 3, lwd=1, angle=90)
arrows(years[2], predictions1[2,2], years[2], predictions1[2,3], code = 3, lwd=1, angle=90)
arrows(years[3], predictions1[3,2], years[3], predictions1[3,3], code = 3, lwd=1, angle=90)
arrows(years[4], predictions1[4,2], years[4], predictions1[4,3], code = 3, lwd=1, angle=90)
arrows(years[5], predictions1[5,2], years[5], predictions1[5,3], code = 3, lwd=1, angle=90)
arrows(years[6], predictions1[6,2], years[6], predictions1[6,3], code = 3, lwd=1, angle=90)
arrows(years[7], predictions1[7,2], years[7], predictions1[7,3], code = 3, lwd=1, angle=90)
arrows(years[8], predictions1[8,2], years[8], predictions1[8,3], code = 3, lwd=1, angle=90)

plot(years,predictions2[,1], pch=19, main="", xlab="Year", ylab="Predicted Values", 
     cex.lab = 1.5, cex.axis=1.5, xaxt="n", ylim=c(0, 2.5))
axis(1, at=c(1986, 1990, 1993, 1996, 2000, 2003, 2005, 2009), cex.axis=1.5)
arrows(years[1], predictions2[1,2], years[1], predictions2[1,3], code = 3, lwd=1, angle=90)
arrows(years[2], predictions2[2,2], years[2], predictions2[2,3], code = 3, lwd=1, angle=90)
arrows(years[3], predictions2[3,2], years[3], predictions2[3,3], code = 3, lwd=1, angle=90)
arrows(years[4], predictions2[4,2], years[4], predictions2[4,3], code = 3, lwd=1, angle=90)
arrows(years[5], predictions2[5,2], years[5], predictions2[5,3], code = 3, lwd=1, angle=90)
arrows(years[6], predictions2[6,2], years[6], predictions2[6,3], code = 3, lwd=1, angle=90)
arrows(years[7], predictions2[7,2], years[7], predictions2[7,3], code = 3, lwd=1, angle=90)
arrows(years[8], predictions2[8,2], years[8], predictions2[8,3], code = 3, lwd=1, angle=90)

rm(list=ls())










# ==================================
# Predicted Values
# (Figure 7 in Online Appendix)

# load district-level dispersion calculated with all cands prior to ER and LDP, NFP, and DPJ cands post-ER
load("districts_dispersion_prepostER.Rdata") # called dat1a
dat1a$er <- as.factor(dat1a$er)
dat1a$elec_2009 <- as.factor(dat1a$elec_2009)
dat1a$district <- as.factor(dat1a$district)

# load district-level dispersion calculated only with competitive cands prior to ER
load("districts_dispersion_prepostER_comp.Rdata") # called dat1b
dat1b$er <- as.factor(dat1b$er)
dat1b$elec_2009 <- as.factor(dat1b$elec_2009)
dat1b$district <- as.factor(dat1b$district)

fit2a.f3 <- lm(variance ~ time + er + time*er + prefecture + district + elec_2009, data=dat1a)
fit2b.f3 <- lm(variance ~ time + er + time*er + prefecture + district + elec_2009, data=dat1b)

new.data2 <- data.frame(district = rep(levels(dat1a$district)[1],8) , 
                        prefecture = rep(levels(dat1a$prefecture)[1],8),
                        time=c(1:8), 
                        er=as.factor(c(0,0,0,1,1,1,1,1)), 
                        elec_2009=as.factor(c(0,0,0,0,0,0,0,1)) )
pred1 <- predict(fit2a.f3, newdata=new.data2, interval= "confidence" ) 
pred2 <- predict(fit2b.f3, newdata=new.data2, interval= "confidence" )
# NB: will give warning about aliased values

par(mfrow = c(1, 2), mar = c(4,4,2,1), tcl = -0.25, mgp = c(1.75, 0.6, 0),
    font.main = 1, cex.main = 2)
years <- c(1986, 1990, 1993, 1996, 2000, 2003, 2005, 2009)
plot(years, pred1[,1],  pch=19, main="", xlab="Year", ylab="Predicted Values", cex.lab = 1.5, xaxt="n", ylim=c(-1, 4.2))
axis(1, at=c(1986, 1990, 1993, 1996, 2000, 2003, 2005, 2009), cex.axis=1.5)
arrows(years[1], pred1[1,2], years[1], pred1[1,3], code = 3, lwd=1, angle=90)
arrows(years[2], pred1[2,2], years[2], pred1[2,3], code = 3, lwd=1, angle=90)
arrows(years[3], pred1[3,2], years[3], pred1[3,3], code = 3, lwd=1, angle=90)
arrows(years[4], pred1[4,2], years[4], pred1[4,3], code = 3, lwd=1, angle=90)
arrows(years[5], pred1[5,2], years[5], pred1[5,3], code = 3, lwd=1, angle=90)
arrows(years[6], pred1[6,2], years[6], pred1[6,3], code = 3, lwd=1, angle=90)
arrows(years[7], pred1[7,2], years[7], pred1[7,3], code = 3, lwd=1, angle=90)
arrows(years[8], pred1[8,2], years[8], pred1[8,3], code = 3, lwd=1, angle=90)

plot(years,pred2[,1], pch=19, main="", xlab="Year", ylab="Predicted Values", cex.lab = 1.5,xaxt="n", ylim=c(-1, 4.2))
axis(1, at=c(1986, 1990, 1993, 1996, 2000, 2003, 2005, 2009), cex.axis=1.5)
arrows(years[1], pred2[1,2], years[1], pred2[1,3], code = 3, lwd=1, angle=90)
arrows(years[2], pred2[2,2], years[2], pred2[2,3], code = 3, lwd=1, angle=90)
arrows(years[3], pred2[3,2], years[3], pred2[3,3], code = 3, lwd=1, angle=90)
arrows(years[4], pred2[4,2], years[4], pred2[4,3], code = 3, lwd=1, angle=90)
arrows(years[5], pred2[5,2], years[5], pred2[5,3], code = 3, lwd=1, angle=90)
arrows(years[6], pred2[6,2], years[6], pred2[6,3], code = 3, lwd=1, angle=90)
arrows(years[7], pred2[7,2], years[7], pred2[7,3], code = 3, lwd=1, angle=90)
arrows(years[8], pred2[8,2], years[8], pred2[8,3], code = 3, lwd=1, angle=90)

rm(list=ls())
